from moviepy.editor import ImageClip,AudioFileClip,concatenate_videoclips,CompositeVideoClip,TextClip
res = []
text_clip = []
fontsize =50
white = (255,255,255)
audios = []
subtitles = ['line1','line2','line3']
font_path = "/System/Library/Fonts/Supplemental/Arial Unicode.ttf"
for i in range(1,4):
    v1 = ImageClip('input/{}.png'.format(i))
    a = AudioFileClip('{}.wav'.format(i))
    v1 = v1.set_duration(a.duration)
    v1 = v1.set_audio(a)
    res.append(v1)
    s = sum([a.duration for a in audios])
    print('start',s)
    txt_clip1 = (
        TextClip(subtitles[i-1], font=font_path, fontsize=fontsize, color='black', method='label')
            .set_position(("center", "bottom"))
            .set_duration(a.duration)
            .set_start(s)
    )
    audios.append(a)
    text_clip.append(txt_clip1)
result =concatenate_videoclips(res)
clip = CompositeVideoClip([result]+text_clip)
clip.write_videofile('result2.mp4',audio_codec='aac',fps=10)
